Objective

Test Gene2ModuleExpressionScores2 on Kali Immunity (Tran et all Immunity 2019) Dataset

#local path: "/Volumes/GoogleDrive/My Drive/Manuscripts/RNASeq Manuscript/Final Data Sets/Infected DGE Analysis edgeR Paired n71 7072 genes 2018-07-30 eset.rds"
#from google drive
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
  as_id("1ex6BT-0sWSnrl7LNT0xoBItv2R2lqRPT"), path = temp, overwrite = TRUE)
## ! Using an auto-discovered, cached token.
##   To suppress this message, modify your code or options to clearly consent to
##   the use of a cached token.
##   See gargle's "Non-interactive auth" vignette for more details:
##   <https://gargle.r-lib.org/articles/non-interactive-auth.html>
## ℹ The googledrive package is using a cached token for 'tuantran@iu.edu'.
## File downloaded:
## • 'Infected DGE Analysis edgeR Paired n71 7072 genes 2018-07-30 eset.rds'
##   <id: 1ex6BT-0sWSnrl7LNT0xoBItv2R2lqRPT>
## Saved locally as:
## • '/var/folders/nc/wdrlkm350r57dvty_cc9cr68nljq2f/T//RtmpPVAEDO/file99072c41c613.rds'
paired_eset <- readRDS(file = dl$local_path)

Remake expression set and convert to cpm

# Make DGEList object

y  <- DGEList(counts=counts(paired_eset), genes= fData(paired_eset), group= paste(paired_eset$Class, paired_eset$Infection.Status, sep = "_"),remove.zeros=T)
## Removing 4584 rows with all zero counts
# tapply(y$samples$lib.siz, INDEX= y$samples$group, summary)
# plot(y$samples$group, y$samples$lib.siz)
# summary(y$samples$group)

##########
# Filter #
##########
y <- y[rowSums(cpm(y)>1) >= min(summary(y$samples$group)),] #Filter low expression tags: keep genes with at least 1 CPM in at least X, where X is the number of samples in the smallest group
y <- y[rowSums(is.na(y $genes))==0, ]       #Filter those with annotation (they all have annotation so this is optional)
dim(y)
## [1] 14677   142
o <- order(rowSums(y$counts))               #filter duplicates (chose transcript with highest overall count per user guide)
y <- y[o,]
d <- duplicated(y$genes$GeneSymbol)
y <- y[!d,]

y$samples$lib.size <- colSums(y$counts)
y <- calcNormFactors(y)                                             #Normalization

cpm_dat <- cpm(y, prior.count = 2,log=TRUE)

# create ExpressionSet and view data for moderated cpm 

fData_cpm <- featureData(paired_eset)[rownames(as.matrix(cpm_dat)),]
logcpm_eset <- new("ExpressionSet", phenoData = phenoData(paired_eset), exprs = as.matrix(cpm_dat), featureData = fData_cpm)
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
plot_dat <- pData(logcpm_eset) %>%
  left_join(., hiBTM_dat %>%
              t() %>%
              scale() %>%
              as.data.frame() %>%
              rownames_to_column(var = "xid"),
            by = "xid")

Clustering Heatmap of Samples using high-annotation level BTMs

library(pheatmap)

hiBTM_dat %>%
  t() %>%
  scale() %>%
  t() %>%
  pheatmap()

Clustering Heatmap of Samples using low-annotation level BTMs

library(pheatmap)

lowBTM_dat %>%
  t() %>%
  scale() %>%
  t() %>%
  pheatmap()